##
# Create Figure 3

### Prep ###

.libPaths( c(.libPaths(), ""))

rm(list=ls())
setwd('')
if (!dir.exists('../2019output/ssn_rd'))   dir.create('../2019output/ssn_rd')
filepath = ''
source("methods.R")


### Helpful methods ###
days_to_base = function(data)  {
   data = data[month(dob) <= 6, base_day := paste0(as.character(year(dob)), "-01-02")]
   data = data[month(dob) >= 7, base_day := paste0(as.character(year(dob) + 1), "-01-02")]
   data = data[, base_day := as.Date(base_day)]
   data = data[, days_since := as.numeric(difftime(dob, base_day, units = 'days'))]
   return(data)
}

normalize_rd = function(rd, ROUND)  {
   rd = rd[days_since == -ROUND, ':='(claim_age_norm = claim_age, retire_age_norm = retire_age)]
   rd = rd[, ':='(claim_age_norm = max(claim_age_norm, na.rm = T), 
                  retire_age_norm = max(retire_age_norm, na.rm = T)), by = 'period']
   rd = rd[, ':='(claim_age_norm = claim_age - claim_age_norm, 
                  retire_age_norm = retire_age - retire_age_norm)]
}
   

### Load data ###

datafile = paste0('../2019data/by_ssn.csv')
dt = data.table( fread(datafile) )
dt = dt[claim_age >= 61.5 & claim_age <= 70]
#dt = dt[claim_age < 61.5 | claim_age > 70, claim_age := NA]     

dt = dt[, dob := as.Date(dob, format = '%d%b%Y')]
dt = days_to_base(dt)
dt = dt[abs(days_since) <= 90 & abs(days_since) >= 2,]

dt = dt[year(base_day) >= 1932 & year(base_day) <= 1943,]
dt = dt[year(base_day) <= 1937, period := '1932-37'][year(base_day) >= 1938, period := '1938-43']
dt = dt[, ':='(	 claimage62 = as.numeric(claim_age >= 62 & claim_age < 63), 
			 claimage63 = as.numeric(claim_age >= 63 & claim_age < 64), 
			 claimage64 = as.numeric(claim_age >= 64 & claim_age < 65), 
			 claimage65 = as.numeric(claim_age >= 65 & claim_age < 66), 
			 claimage66 = as.numeric(claim_age >= 66 & claim_age < 67), 
			 claimage67 = as.numeric(claim_age >= 67 & claim_age < 68),
			 claimagenew2mo = as.numeric(
				(claim_age >= 65 		& claim_age < 65.16 	& year(base_day) <= 1937) |
				(claim_age >= 65.16 	& claim_age < 65.33 	& year(base_day) == 1938) |
				(claim_age >= 65.33 	& claim_age < 65.5 	& year(base_day) == 1939) |
				(claim_age >= 65.5	& claim_age < 65.66	& year(base_day) == 1940) |
				(claim_age >= 65.66	& claim_age < 65.83 	& year(base_day) == 1941) | 
				(claim_age >= 65.83	& claim_age < 66		& year(base_day) == 1942) |
				(claim_age >= 66		& claim_age < 66.16	& year(base_day) >= 1943) ),
			claimagenewgt = as.numeric(
				(claim_age >= 65 		& year(base_day) <= 1937) |
				(claim_age >= 65.16 	& year(base_day) == 1938) |
				(claim_age >= 65.33 	& year(base_day) == 1939) |
				(claim_age >= 65.5	& year(base_day) == 1940) |
				(claim_age >= 65.66	& year(base_day) == 1941) | 
				(claim_age >= 65.83	& year(base_day) == 1942) |
				(claim_age >= 66		& year(base_day) >= 1943) ),
			 male = as.numeric(sex == "M"))]



# Alternative definition of earnings: Use same calendar year for each side of cutoff #
dt = dt[, ':='(	alt_sern62 = ifelse(days_since > 0, sern62, sern63),
			alt_sern63 = ifelse(days_since > 0, sern63, sern64),
			alt_sern64 = ifelse(days_since > 0, sern64, sern65),
			alt_sern65 = ifelse(days_since > 0, sern65, sern66),
			alt_sern66 = ifelse(days_since > 0, sern66, sern67))]


summary(dt)


### Create RD figures ###

ROUND = 10

rd = copy(dt)[, days_since := ROUND*round(days_since/ROUND)]
rd = rd[year(base_day) >= 1938 & year(base_day) <= 1943,]
rd = rd[, .(num = sum(num), 
	claim_age = weighted.mean(claim_age, num, na.rm = T), 
	claimage62 = weighted.mean(claimage62, num, na.rm = T), 
	claimage63 = weighted.mean(claimage63, num, na.rm = T), 
	claimage64 = weighted.mean(claimage64, num, na.rm = T), 
	claimage65 = weighted.mean(claimage65, num, na.rm = T), 
	claimage66 = weighted.mean(claimage66, num, na.rm = T), 
	claimagenew2mo = weighted.mean(claimagenew2mo, num, na.rm = T),
	claimagenewgt = weighted.mean(claimagenewgt, num, na.rm = T),
	retire_age = weighted.mean(retire_age, num, na.rm = T),
	sern62 = weighted.mean(sern62,num, na.rm = T),
	sern63 = weighted.mean(sern63,num, na.rm = T),
	sern64 = weighted.mean(sern64,num, na.rm = T),
	sern65 = weighted.mean(sern65,num, na.rm = T),
	sern66 = weighted.mean(sern66,num, na.rm = T),
	alt_sern62 = weighted.mean(alt_sern62,num, na.rm = T),
	alt_sern63 = weighted.mean(alt_sern63,num, na.rm = T),
	alt_sern64 = weighted.mean(alt_sern64,num, na.rm = T),
	alt_sern65 = weighted.mean(alt_sern65,num, na.rm = T),
	alt_sern66 = weighted.mean(alt_sern66,num, na.rm = T),
	life_income = weighted.mean(life_income,num, na.rm = T)), 
        by = c('period', 'days_since')]
rd = rd[num >= 10, ]
rd = normalize_rd(rd, ROUND)

summary(rd)


# RD graphs #
for (var in c("claim_age_norm", "claimage62", "claimage63", "claimage64", "claimage65", "claimage66", "claimagenew2mo", "claimagenewgt", "life_income", "alt_sern62", "alt_sern63", "alt_sern64", "alt_sern65", "alt_sern66")) { 
	eval(parse(text = paste0("ggplot(data = rd, aes(x = days_since, y = ",var,", color = factor(period), size = num)) + geom_point(shape = 1) +
  	geom_vline(xintercept = 0, color = 'black', lty = 'dashed') + th + scale_color_manual(values = colpal) + guides(size = F) +
	labs(title = 'RD on ",var,"', x = 'Birthday - Jan 2', y = '",var,"', color = 'Period')")))

	eval(parse(text = paste0("ggsave(paste0(filepath, '/Figure3_",var,".png'), height = 4, width = 6)")))
}



